import requests
from lxml import etree
from pymongo import MongoClient

client = MongoClient('127.0.0.1', 27017)
collection = client['ziroom']['z1']
url = 'http://cd.ziroom.com/z/'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
    'Cookie': 'CURRENT_CITY_CODE=510100; _csrf=iyJKvcRNjrnyskU-Jem9BNLerXUU7N1_; sajssdk_2015_cross_new_user=1; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%221764ab44a9213c-06004ac1dc407f-c791039-1327104-1764ab44a932de%22%2C%22%24device_id%22%3A%221764ab44a9213c-06004ac1dc407f-c791039-1327104-1764ab44a932de%22%2C%22props%22%3A%7B%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D; Hm_lvt_4f083817a81bcb8eed537963fc1bbf10=1607571098; Hm_lpvt_4f083817a81bcb8eed537963fc1bbf10=1607571098; gr_user_id=a531a726-bb4c-482d-a181-ffe3883b5d4b; gr_session_id_8da2730aaedd7628=eac2673e-3553-4179-a182-0b85461d1d51; gr_session_id_8da2730aaedd7628_eac2673e-3553-4179-a182-0b85461d1d51=true'
}
r = requests.get(url=url, headers=headers)
html = etree.HTML(r.text)
div_list = html.xpath('/html/body/section/div[3]/div[2]/div')
o = 0
for div_temp in div_list:
    o += 1
    # 去除空列表
    if o != 5:
        title = div_temp.xpath('normalize-space(./div[2]/h5/a/text())')
        floor = div_temp.xpath('normalize-space(./div[2]/div[1]/div[1]/text())')
        price_style = div_temp.xpath('normalize-space(./div[2]/div[2]/span[@class="num"]/@style)')
        location = div_temp.xpath('normalize-space(./div[2]/div[1]/div[2]/text())')
        # 生成信息列表
        info_list = [{
            "title": title,
            "floor": floor,
            # 'price': price_style,
            'position': location
        }]
        print(info_list)
        # 插入数据
        ret = collection.insert_many(info_list)
        # print(ret)
        print(title, floor, price_style, location)
